package com.larry.spark.rdd.transform

import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}

object RDD_Oper_GroupByKey {

  def main(args: Array[String]): Unit = {
    //TODO  使用spark 根据key分组 groupByKey

    val conf = new SparkConf().setMaster("local[*]").setAppName("rdd")
    val sc = new SparkContext(conf)

    val rdd = sc.makeRDD(
      List(
        ("a",1),
        ("a",1),
        ("a",1)
      )
    )
    //分组
    val rdd1: RDD[(String, Iterable[Int])] = rdd.groupByKey()
    rdd1.collect().foreach(println)

    sc.stop()
  }
}
